# this script checks balance between attribute orders

# remotes::install_github("leeper/cregg")
library(dplyr)
library(cobalt)
library(haven)
library(huxtable)
library(ggplot2)
library(cregg)


#import conjoint df enriched by all the vars from the second wave survey
df <- read_sav("drafts/conjoint_solidarity/perspectives_docs/revision_2/replication_code/input_data.sav")
df <- as_factor(df)


# df <- read_sav("drafts/conjoint_solidarity/data/conjoint_september_2022_socdem.sav")
# df <- as_factor(df)


# frequencies of conjoint features with cregg (figure # 2 in appendix)
f1 <- choice ~ Ethnicity + Age + Gender + Children + Motivation + Profession

plot_freq <- plot(cj_freqs(df, f1, id = ~ResponseId))

ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/attr_freqs.png',
  plot = plot_freq,
  width = 7,
  height = 5)


# Left/Right Diagnostics (figure 4 in appendix)
df$profile <- as_factor(df$profile)
plot_prof_effects <- plot(cj(df, f1, id = ~ResponseId, by = ~profile, estimate = "mm"), 
     group = "profile", vline = 0.5)

ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/profile_effects.png',
  plot = plot_prof_effects,
  width = 7,
  height = 5)


# ethicity

ggplot(data=df, aes(y = Ethnicity)) +
  geom_bar(aes(fill = profile))

# table with cregg props
cj_props(df, ~profile + Ethnicity, id = ~ResponseId)

# Chi-Square Test for ethnicity  
chisq.test(df$Ethnicity, df$task)
chisq.test(df$Ethnicity, df$profile)

# motivation 

ggplot(data=df, aes(y = Motivation)) +
  geom_bar(aes(fill = profile))


# Chi-Square Test for motivation 
chisq.test(df$Motivation, df$task)
chisq.test(df$Motivation, df$profile)

# Age

ggplot(data=df, aes(y = Age)) +
  geom_bar(aes(fill = profile))

# Chi-Square Test for Age 
chisq.test(df$Age, df$task)
chisq.test(df$Age, df$profile)


# Profession

ggplot(data=df, aes(y = Profession)) +
  geom_bar(aes(fill = profile))

# Chi-Square Test for Profession 
chisq.test(df$Profession, df$task)
chisq.test(df$Profession, df$profile)

# Children

ggplot(data=df, aes(y = Children)) +
  geom_bar(aes(fill = profile))

# Chi-Square Test for Children 
chisq.test(df$Children, df$task)
chisq.test(df$Children, df$profile)

# Gender

ggplot(data=df, aes(y = Gender)) +
  geom_bar(aes(fill = profile))

# Chi-Square Test for Children 
chisq.test(df$Gender, df$task)
chisq.test(df$Gender, df$profile)

############################################################
# this script checks covariates balance for randomization

# Balance Testing for age with cregg

plot(mm(df, age_respondent ~ Ethnicity + Age + Gender + Children + Motivation + Profession, 
        id = ~ResponseId), xlim = c(32, 35), 
     vline = mean(df$age_respondent, na.rm = TRUE))


#soc-dem variables for balance check
soc_dem_vars <- dplyr::select(df, sex, education, child_bin, 
                              income_consum, locality_cize)

df <- df %>%
  mutate(
    female = case_when(
      sex == "1. Female" ~ 1,
      sex == "0. Male" ~ 0,
      TRUE ~ NA_real_ # This handles cases that do not match the above conditions
    ),
    higher_edu_bin = case_when(
      higher_education == "1. Higher education" ~ 1,
      higher_education == "0. Less than higher" ~ 0,
      TRUE ~ NA_real_ # This handles cases that do not match the above conditions
    )
  )

soc_dem_vars_short <- dplyr::select(df, 
                                    sex, 
                                    age_respondent, 
                                    higher_edu_bin,
                                    income_consum_num,
                                    polit_civic_index)


#political vars for balance check

df <- df %>%
  mutate(polit_90_meet_any = if_else(is.na(polit_90_meet_safe),
                                     polit_90_meet_unsafe,
                                     polit_90_meet_safe),
         civic_index = ifelse(rowSums(is.na(dplyr::select(., polit_merged_help_ukr, 
                                                          polit_merged_help_rus, 
                                                          polit_merged_help_ngo, 
                                                          polit_merged_help_ngo_loc,
                                                          polit_merged_volunt))) == 5, 
                              NA, 
                              rowSums(dplyr::select(., polit_merged_help_ukr, 
                                                    polit_merged_help_rus, 
                                                    polit_merged_help_ngo, 
                                                    polit_merged_help_ngo_loc,
                                                    polit_merged_volunt), 
                                      na.rm = TRUE)),
         polit_index = ifelse(rowSums(is.na(dplyr::select(., polit_merged_meet_safe,
                                                          polit_merged_meet_unsafe,
                                                          polit_merged_text))) == 3, 
                              NA, 
                              rowSums(dplyr::select(., polit_merged_meet_safe,
                                                    polit_merged_meet_unsafe,
                                                    polit_merged_text), 
                                      na.rm = TRUE))
  ) 


df <- df %>%
  mutate(polit_bin = ifelse(polit_index > 0, 1, 0),
         civic_bin = ifelse(civic_index > 0, 1, 0))

polit_vars <- dplyr::select(df, polit_merged_help_rus, polit_merged_help_ngo, politics_interest_ru_num,
                            polit_bin, civic_bin, polit_index, civic_index)

# Ethnicity attribute check
bal_tab_ <- bal.tab(Ethnicity ~ soc_dem_vars + polit_vars, data = df,
                    var.order = "alphabetical", binary = "std", 
                    stats = c("mean.diffs", "variance.ratios"),
                    abs = T,
                    thresholds = c(m = .1, v = 2),
                    title = "Ethnicity")



table_df <- tibble::rownames_to_column(bal_tab_$Balance.Across.Pairs[1:5], "Variable")

as_hux(table_df) %>%
  set_font_size(8) %>%
  set_row_height(0.1) %>%
  theme_article() %>%
  set_all_borders()


# renaming vector
vec_rename = data.frame(old = c("age_respondent", "higher_edu_bin", "income_consum_num", "polit_civic_index", "female"),
                        new = c("Age", "Has higher education", "Income (1-6)", "Political/civic index (0-8)", "Female"))


balanc_check_ethn <- love.plot(Ethnicity ~ soc_dem_vars_short, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Ethnicity",
          var.names = vec_rename) + theme(legend.position = "none")

ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/balance_ethnicity.jpeg',
  plot = balanc_check_ethn,
  width = 7,
  height = 5)


love.plot(Ethnicity ~ polit_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Ethnicity (polit variables)")

# Motivation attribute check
bal_tab_ <- bal.tab(Motivation ~ soc_dem_vars + polit_vars, data = df,
                    var.order = "alphabetical", binary = "std", 
                    stats = c("mean.diffs", "variance.ratios"),
                    abs = T,
                    thresholds = c(m = .1, v = 2),
                    title = "Motivation")

df_table <- tibble::rownames_to_column(bal_tab_$Balance.Across.Pairs[1:5], "Variable")

as_hux(df_table) %>%
  set_font_size(8) %>%
  set_row_height(0.1) %>%
  theme_article() %>%
  set_all_borders()

balanc_check_motiv <- love.plot(Motivation ~ soc_dem_vars_short, data = df,
                               var.order = "alphabetical", binary = "std", 
                               stats = c("mean.diffs"),
                               abs = T,
                               thresholds = c(m = .1, v = 2),
                               title = "Motivation",
                               var.names = vec_rename) + theme(legend.position = "none")

ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/balance_motivation.jpeg',
  plot = balanc_check_motiv,
  width = 7,
  height = 5)


love.plot(Motivation ~ soc_dem_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Motivation (soc-dem variables)")

love.plot(Motivation ~ polit_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Motivation (polit variables)")

# Age attribute check
bal_tab_ <- bal.tab(Age ~ soc_dem_vars + polit_vars, data = df,
                    var.order = "alphabetical", binary = "std", 
                    stats = c("mean.diffs", "variance.ratios"),
                    abs = T,
                    thresholds = c(m = .1, v = 2),
                    title = "Age")

df_table <- tibble::rownames_to_column(bal_tab_$Balance.Across.Pairs[1:5], "Variable")

as_hux(df_table) %>%
  set_font_size(8) %>%
  set_row_height(0.1) %>%
  theme_article() %>%
  set_all_borders()


balanc_check_age <- love.plot(Age ~ soc_dem_vars_short, data = df,
                                var.order = "alphabetical", binary = "std", 
                                stats = c("mean.diffs"),
                                abs = T,
                                thresholds = c(m = .1, v = 2),
                                title = "Age",
                                var.names = vec_rename) + theme(legend.position = "none")

ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/balance_age.jpeg',
  plot = balanc_check_age,
  width = 7,
  height = 5)

love.plot(Age ~ soc_dem_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Age (soc-dem variables)")

love.plot(Age ~ polit_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Motivation (polit variables)")

# Profession attribute check
bal_tab_ <- bal.tab(Profession ~ soc_dem_vars + polit_vars, data = df,
                    var.order = "alphabetical", binary = "std", 
                    stats = c("mean.diffs", "variance.ratios"),
                    abs = T,
                    thresholds = c(m = .1, v = 2),
                    title = "Profession")

df_table <- tibble::rownames_to_column(bal_tab_$Balance.Across.Pairs[1:5], "Variable")

as_hux(df_table) %>%
  set_font_size(8) %>%
  set_row_height(0.1) %>%
  theme_article() %>%
  set_all_borders()




balanc_check_prof <- love.plot(Profession ~ soc_dem_vars_short, data = df,
                              var.order = "alphabetical", binary = "std", 
                              stats = c("mean.diffs"),
                              abs = T,
                              thresholds = c(m = .1, v = 2),
                              title = "Profession",
                              var.names = vec_rename) + theme(legend.position = "none")

ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/balance_profession.jpeg',
  plot = balanc_check_prof,
  width = 7,
  height = 5)

love.plot(Profession ~ soc_dem_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Profession (soc-dem variables)")

love.plot(Profession ~ polit_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Profession (polit variables)")

# Gender attribute check
bal_tab_ <- bal.tab(Gender ~ soc_dem_vars + polit_vars, data = df,
                    var.order = "alphabetical", binary = "std", 
                    stats = c("mean.diffs", "variance.ratios"),
                    abs = T,
                    thresholds = c(m = .1, v = 2),
                    title = "Gender")

df_table <- tibble::rownames_to_column(bal_tab_$Balance.Across.Pairs[1:5], "Variable")

as_hux(df_table) %>%
  set_font_size(8) %>%
  set_row_height(0.1) %>%
  theme_article() %>%
  set_all_borders()

balanc_check_gender <- love.plot(Gender ~ soc_dem_vars_short, data = df,
                               var.order = "alphabetical", binary = "std", 
                               stats = c("mean.diffs"),
                               abs = T,
                               thresholds = c(m = .1, v = 2),
                               title = "Gender",
                               var.names = vec_rename)+ theme(legend.position = "none")

ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/balance_gender.jpeg',
  plot = balanc_check_gender,
  width = 7,
  height = 5)

love.plot(Gender ~ soc_dem_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Gender (soc-dem variables)")

love.plot(Gender ~ polit_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Gender (polit variables)")

# Children attribute check
bal_tab_ <- bal.tab(Children ~ soc_dem_vars + polit_vars, data = df,
                    var.order = "alphabetical", binary = "std", 
                    stats = c("mean.diffs", "variance.ratios"),
                    abs = T,
                    thresholds = c(m = .1, v = 2),
                    title = "Children")

df_table <- tibble::rownames_to_column(bal_tab_$Balance.Across.Pairs[1:5], "Variable")

as_hux(df_table) %>%
  set_font_size(8) %>%
  set_row_height(0.1) %>%
  theme_article() %>%
  set_all_borders()


balanc_check_child <- love.plot(Children ~ soc_dem_vars_short, data = df,
                                 var.order = "alphabetical", binary = "std", 
                                 stats = c("mean.diffs"),
                                 abs = T,
                                 thresholds = c(m = .1, v = 2),
                                 title = "Children",
                                 var.names = vec_rename) + theme(legend.position = "none")

ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/balance_children.jpeg',
  plot = balanc_check_child,
  width = 7,
  height = 5)


love.plot(Children ~ soc_dem_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Children (soc-dem variables)",
          quick = F,
          disp.v.ratio = T)

love.plot(Children ~ polit_vars, data = df,
          var.order = "alphabetical", binary = "std", 
          stats = c("mean.diffs", "variance.ratios"),
          abs = T,
          thresholds = c(m = .1, v = 2),
          title = "Children (polit variables)")



library(patchwork)
# plotting figure 3 in appendix
balance_check_all <- (balanc_check_age + balanc_check_prof) / (balanc_check_motiv + balanc_check_gender) / (balanc_check_child + balanc_check_ethn)



ggsave(
  'drafts/conjoint_solidarity/plots/diagnostics/balance_checks_all.jpeg',
  plot = balance_check_all,
  width = 7,
  height = 8)
